Set working directory

setwd("/Users/veronica/Documents/Rprojects/postdoc Rprojects/ODU_postdoc_Rprojects/AmSam_isotopes/CSIA-AA_AmSam_coral")

AA d13C data

# changed d13C to d13C_AA because will merge with meta data sheet
# that has bulk isotope data
aa.d13C <- read.csv("AA_C_data_RadiceCoral_20230821_JDC.csv", header = TRUE)

aa.d13C[sapply(aa.d13C, is.character)] <- lapply(aa.d13C[sapply(aa.d13C,
    is.character)], as.factor)

head(aa.d13C)
##     Batch         ID Vial_Sample_ID           Species
## 1 Batch 1 DS2_3 Host    DS2_03_Host Leptoseris glabra
## 2 Batch 1 DS2_1 Symb     DS2_01_Sym Leptoseris scabra
## 3 Batch 1 DS2_2 Host    DS2_02_Host Leptoseris glabra
## 4 Batch 1 DS2_3 Symb     DS2_03_Sym Leptoseris glabra
## 5 Batch 1 DS2_7 Host    DS2_07_Host Leptoseris glabra
## 6 Batch 1 DS2_5_Host    DS2_05_Host Leptoseris glabra
##                Fraction_details      Group   Group2   Group3
## 1 Mesophotic, Leptoseris glabra Coral Host                  
## 2 Mesophotic, Leptoseris scabra   Symbiont Symbiont Symbiont
## 3 Mesophotic, Leptoseris glabra Coral Host                  
## 4 Mesophotic, Leptoseris glabra   Symbiont Symbiont Symbiont
## 5 Mesophotic, Leptoseris glabra Coral Host                  
## 6 Mesophotic, Leptoseris glabra Coral Host                  
##                Group4   Group_Reference  Site    Ala   Gly    Thr    Ser    Val
## 1                     Coral Host_Radice Leone  -9.73 15.60  -8.92   0.33 -19.52
## 2 Symbiont-Mesophotic   Symbiont_Radice Leone -11.40 -7.63 -17.17 -37.34 -17.90
## 3                     Coral Host_Radice Leone  -8.34  9.94 -24.97  -1.95 -18.38
## 4 Symbiont-Mesophotic   Symbiont_Radice Leone  -6.94 -0.86 -13.17 -21.89 -19.77
## 5                     Coral Host_Radice Leone -12.07  4.52  -7.07   4.82 -18.92
## 6                     Coral Host_Radice Leone   2.45 12.18  -1.60 -68.98 -17.01
##      Leu    Ile    Nle    Asp    Glu    Phe    Lys   Arg    Ala_sd    Gly_sd
## 1 -29.44 -13.67 -22.88 -19.95 -16.34 -23.81 -14.74    NA 0.3609908 0.2823780
## 2 -28.17 -14.43 -22.24 -15.26 -14.63 -20.95 -13.38    NA 0.3535534 0.3238549
## 3 -22.55 -10.12 -21.55  -6.39 -10.83 -22.52 -11.55    NA 0.6100000 0.2500000
## 4 -23.38 -14.77 -21.39  -8.08 -11.38 -23.53 -13.14    NA 0.3700000 0.3400000
## 5 -26.09 -12.85 -21.56 -14.17 -14.60 -25.77 -14.35    NA 0.5200000 0.2300000
## 6 -23.27  -9.85 -21.18 -12.08 -11.88 -24.69 -11.86 -0.65 0.0800000 0.3400000
##      Thr_sd    Ser_sd    Val_sd    Leu_sd    Ile_sd    Nle_sd    Asp_sd
## 1 0.6358089 0.4367887 0.5924635 0.4558201 0.2234547 0.5530976 0.6675088
## 2 0.8181225 0.1598061 0.6427601 1.2883486 0.3167838 0.7898383 0.7771104
## 3 0.3000000 0.4400000 0.2800000 0.0500000 0.1500000 0.2500000 0.1100000
## 4 0.3800000 0.4700000 0.3400000 0.1100000 0.3100000 0.4200000 0.2700000
## 5 0.2000000 0.5600000 0.4600000 0.0300000 0.1600000 0.1600000 0.0600000
## 6 0.3200000 0.4700000 0.4600000 0.1800000 0.1900000 0.1900000 0.1000000
##      Glu_sd     Phe_sd    Lys_sd Arg_sd
## 1 0.2364663 0.36125937 0.1387420     NA
## 2 0.3047630 0.02474874 0.2234457     NA
## 3 0.2600000 0.16000000 0.0900000     NA
## 4 0.1200000 0.07000000 0.0300000     NA
## 5 0.0400000 0.24000000 0.1700000     NA
## 6 0.1200000 0.04000000 0.1100000     NA

Summary - data set

Two different depths shallow and mesophotic Montipora grisea

aa.d13C %>%
    group_by(Group, Species) %>%
    dplyr::summarise(count = n())
## # A tibble: 8 × 3
## # Groups:   Group [3]
##   Group      Species                   count
##   <fct>      <fct>                     <int>
## 1 Coral Host Leptoseris glabra             5
## 2 Coral Host Leptoseris mycetoseroides     5
## 3 Coral Host Montipora grisea             10
## 4 Plankton   Plankton                      5
## 5 Symbiont   Leptoseris glabra             4
## 6 Symbiont   Leptoseris mycetoseroides     5
## 7 Symbiont   Leptoseris scabra             1
## 8 Symbiont   Montipora grisea             10

meta data

meta <- read.csv("AmericanSamoa_Coral-isotopes_Master_2023-10.csv", header = TRUE)

meta[sapply(meta, is.character)] <- lapply(meta[sapply(meta, is.character)],
    as.factor)

head(meta)
##   Date_collection  Site      Group      Genus    Spp           Species
## 1        20220302 Leone Coral Host Leptoseris glabra Leptoseris glabra
## 2        20220302 Leone Coral Host Leptoseris glabra Leptoseris glabra
## 3        20220302 Leone Coral Host Leptoseris glabra Leptoseris glabra
## 4        20220302 Leone Coral Host Leptoseris glabra Leptoseris glabra
## 5        20220302 Leone Coral Host Leptoseris glabra Leptoseris glabra
## 6        20220302 Leone   Symbiont Leptoseris scabra Leptoseris scabra
##        Depth Depth_m ITS2_top1                ITS2_top1_details ITS2_top2
## 1 Mesophotic    37.3       C21                  C21,C21a,C21.12       C17
## 2 Mesophotic    37.3     C27.1                                        C3*
## 3 Mesophotic    38.1     C27.1                                       C42b
## 4 Mesophotic    38.1        D4                                         D5
## 5 Mesophotic      38     C27.1                                        C3*
## 6 Mesophotic    37.3       C21 C21.14,C21,_C21.12,_C21.16,_C21a          
##   ITS2_other.top Symbiont.genus.dominant Symbiont_Literature Literature.ref
## 1            C3*             Cladocopium                                   
## 2            C3*             Cladocopium                                   
## 3            C3*             Cladocopium                                   
## 4  other D, C15*             Durusdinium                                   
## 5            C3*             Cladocopium                                   
## 6       C3*, C17             Cladocopium                                   
##   Vial_Sample_ID            Notes_2022 mg_tissue_2022         Status_2022
## 1    DS2_02_Host Derivatized July-2022             NA Derivatized- at URI
## 2    DS2_03_Host Derivatized July-2022             NA Derivatized- at URI
## 3    DS2_05_Host Derivatized July-2022             NA Derivatized- at URI
## 4    DS2_07_Host Derivatized July-2022             NA Derivatized- at URI
## 5    DS2_08_Host Derivatized July-2022             NA Derivatized- at URI
## 6     DS2_01_Sym                                 5.13     Acid Hydrolyzed
##       Status_2023 mg_tissue_2023 Tissue.remaining_2023         Sample_ID
## 1 Acid Hydrolyzed           3.13                   Yes Host_AS_S1_DS2_02
## 2 Acid Hydrolyzed           3.29                   Yes Host_AS_S1_DS2_03
## 3 Acid Hydrolyzed           3.35                   Yes Host_AS_S1_DS2_05
## 4 Acid Hydrolyzed           3.12                   Yes Host_AS_S1_DS2_07
## 5 Acid Hydrolyzed           3.38                   Yes Host_AS_S1_DS2_08
## 6                             NA           Tiny amount  Sym_AS_S1_DS2_01
##   CSIA.AA CSIA.AA_Priority.rank   d13C d15N C_microg N_microg  CN Weight_microg
## 1    d13C                     1 -17.98 6.25    392.4     78.8 5.8          1965
## 2    d13C                     2 -18.54 6.64    405.9     87.1 5.4          1919
## 3    d13C                     3 -18.83 6.98    456.8     91.4 5.8          1960
## 4    d13C                     4 -19.84 6.88    452.4     92.4 5.7          2015
## 5    d13C                     5 -17.03 7.00    414.8     85.7 5.6          1982
## 6    d13C                     6     NA   NA       NA       NA  NA            NA
##   C_weight.percent N_weight.percent RNAseq Niche Morphology Size_fraction
## 1             20.0              4.0    Yes                               
## 2             21.2              4.5    Yes                               
## 3             23.3              4.7    Yes                               
## 4             22.5              4.6    Yes                               
## 5             20.9              4.3    Yes                               
## 6               NA               NA                                      
##         Type Genus_species_morph
## 1 Specialist   Leptoseris glabra
## 2 Specialist   Leptoseris glabra
## 3 Specialist   Leptoseris glabra
## 4 Specialist   Leptoseris glabra
## 5 Specialist   Leptoseris glabra
## 6 Specialist   Leptoseris scabra
aa.d13C.meta <- meta %>%
    inner_join(aa.d13C, by = c("Vial_Sample_ID", "Site", "Group", "Species"))

aa.d13C.meta <- droplevels(aa.d13C.meta)

dim(aa.d13C.meta)
## [1] 45 72
select only the essential amino acids
aa.d13C.ess <- aa.d13C.meta %>%
    dplyr::select(Group, Group2, Group3, Species, Depth, Vial_Sample_ID,
        Ile, Leu, Phe, Thr, Val)

# aa.d13C.ess <- aa.d13C.ess %>% filter(AA == 'Ile' | AA == 'Leu' |
# AA == 'Phe' | AA == 'Thr' | AA == 'Val')

aa.d13C.ess <- droplevels(aa.d13C.ess)

head(aa.d13C.ess)
##        Group   Group2   Group3           Species      Depth Vial_Sample_ID
## 1 Coral Host                   Leptoseris glabra Mesophotic    DS2_02_Host
## 2 Coral Host                   Leptoseris glabra Mesophotic    DS2_03_Host
## 3 Coral Host                   Leptoseris glabra Mesophotic    DS2_05_Host
## 4 Coral Host                   Leptoseris glabra Mesophotic    DS2_07_Host
## 5 Coral Host                   Leptoseris glabra Mesophotic    DS2_08_Host
## 6   Symbiont Symbiont Symbiont Leptoseris scabra Mesophotic     DS2_01_Sym
##      Ile    Leu    Phe    Thr    Val
## 1 -10.12 -22.55 -22.52 -24.97 -18.38
## 2 -13.67 -29.44 -23.81  -8.92 -19.52
## 3  -9.85 -23.27 -24.69  -1.60 -17.01
## 4 -12.85 -26.09 -25.77  -7.07 -18.92
## 5  -9.18 -23.05 -24.44  -3.21 -19.51
## 6 -14.43 -28.17 -20.95 -17.17 -17.90

Select Coral Host (consumer) host data only

host.aa.d13C.ess <- subset(aa.d13C.ess, Group == "Coral Host")
host.aa.d13C.ess <- droplevels(host.aa.d13C.ess)

Sources

Relevant source (particulate organic sources from tropical coral reefs) CSIA-AA d13C values from the literature. Only included essential AA data.

# had issues with importing csv can delete Lat and Long columns
# because read.csv() does not like 'degree' symbol also delete 'um'
# (micrometer) symbol to just um also delete columns with '/' (DOI,
# etc.) and ' ** or otherwise use fileEncoding='latin1' and that
# worked
sources <- read.csv("Sources_EAA_literature.csv", header = TRUE, fileEncoding = "latin1")

sources[sapply(sources, is.character)] <- lapply(sources[sapply(sources,
    is.character)], as.factor)

head(sources)
##   Group        Group2              Group3              Group4 Size_fraction
## 1   POM Phytoplankton Phytoplankton-field Phytoplankton-proxy              
## 2   POM Phytoplankton Phytoplankton-field Phytoplankton-proxy              
## 3   POM Phytoplankton Phytoplankton-field Phytoplankton-proxy              
## 4   POM Phytoplankton Phytoplankton-field Phytoplankton-proxy              
## 5   POM Phytoplankton Phytoplankton-field Phytoplankton-proxy              
## 6   POM Phytoplankton Phytoplankton-field Phytoplankton-proxy              
##   Species                                           Notes Sample_ID Sample.Size
## 1         Pelagic copepod proxy for pelagic phytoplankton                     3
## 2         Pelagic copepod proxy for pelagic phytoplankton                     3
## 3         Pelagic copepod proxy for pelagic phytoplankton                     3
## 4         Pelagic copepod proxy for pelagic phytoplankton                     3
## 5         Pelagic copepod proxy for pelagic phytoplankton                     3
## 6         Pelagic copepod proxy for pelagic phytoplankton                     3
##   Country_Island          Location  Environment  Group_Reference       Lat
## 1   Saudi Arabia        Ron's Reef   Shelf reef Plankton_McMahon 20.1348¡N
## 2   Saudi Arabia         LJ's Reef   Shelf reef Plankton_McMahon 19.9582¡N
## 3   Saudi Arabia         Saut Reef   Shelf reef Plankton_McMahon 19.8875¡N
## 4   Saudi Arabia        Brown Reef   Shelf reef Plankton_McMahon 19.8556¡N
## 5   Saudi Arabia       Canyon Reef Oceanic reef Plankton_McMahon 19.8923¡N
## 6   Saudi Arabia ShiÕb Sulaym Reef Oceanic reef Plankton_McMahon 19.8980¡N
##        Long                      Reference                        DOI
## 1 40.1012¡W McMahon et al. 2016 Oecologia   10.1007/s00442-015-3475-3
## 2 40.2673¡W McMahon et al. 2016 Oecologia   10.1007/s00442-015-3475-7
## 3 40.1565¡W McMahon et al. 2016 Oecologia  10.1007/s00442-015-3475-11
## 4 40.2162¡W McMahon et al. 2016 Oecologia  10.1007/s00442-015-3475-15
## 5 39.9591¡W McMahon et al. 2016 Oecologia  10.1007/s00442-015-3475-19
## 6 40.0062¡W McMahon et al. 2016 Oecologia  10.1007/s00442-015-3475-23
##   Distance.to.Samoa..Km..apprx....14.342...170.789.   Thr Thr_SD   Ile Ile_SD
## 1                                             16600  -9.6    0.5 -17.8    0.5
## 2                                             16600  -8.0    0.2 -17.9    0.4
## 3                                             16600  -7.7    0.4 -14.0    1.1
## 4                                             16600  -7.8    0.6 -16.3    0.5
## 5                                             16600 -10.4    1.0 -16.6    0.8
## 6                                             16600  -9.6    1.0 -16.1    0.2
##     Val Val_SD   Phe Phe_SD   Leu Leu_SD
## 1 -23.2    0.3 -24.7    0.4 -25.5    0.2
## 2 -23.2    0.3 -24.9    0.2 -26.6    0.3
## 3 -21.0    1.1 -23.8    1.0 -23.3    0.9
## 4 -22.9    0.1 -23.8    0.8 -24.8    0.3
## 5 -20.5    1.3 -24.1    0.5 -25.3    0.3
## 6 -19.9    0.9 -23.4    0.7 -24.8    0.4
levels(sources$Group)
## [1] "Detritus"   "Macroalgae" "Plankton"   "POM"        "Symbiont"

Some are phytoplankton samples from culture (Stahl et al. 2023)

Filter out irrelevant data

sources <- subset(sources, Group != "Macroalgae")  # not a food source for coral
sources <- subset(sources, Notes != "15C")  # temperature
sources <- subset(sources, Species != "Artemia salina")  # also missing one AAess value, NA
sources <- subset(sources, Reference != "Shih et al. 2019 Microbial Ecology")  # missing one AAess
# sources <- subset(sources, Reference != 'Stahl et al. 2023 L&O') #
# culture phytoplankton
sources <- droplevels(sources)
sources %>%
    group_by(Group, Environment) %>%
    dplyr::summarise(count = n())
## # A tibble: 7 × 3
## # Groups:   Group [4]
##   Group    Environment  count
##   <fct>    <fct>        <int>
## 1 Detritus Oceanic reef    10
## 2 Detritus Shelf reef       4
## 3 Plankton Oceanic reef    10
## 4 POM      Culture         27
## 5 POM      Oceanic reef    21
## 6 POM      Shelf reef       4
## 7 Symbiont Oceanic reef    11

Separate only source data

Add our plankton and symbiont data to source data

sources.Samoa <- subset(aa.d13C.meta, Group != "Coral Host")
sources.Samoa <- droplevels(sources.Samoa)
dim(sources.Samoa)
## [1] 25 72

Merge our source data with literature sources

sources.all <- full_join(sources, sources.Samoa, by = c("Group", "Species",
    "Group_Reference", "Group2", "Group3", "Group4", "Thr", "Ile", "Val",
    "Leu", "Phe", "Sample_ID", "Size_fraction"))
dim(sources.all)
## [1] 112  87
sources.all %>%
    group_by(Group) %>%
    dplyr::summarise(count = n())
## # A tibble: 4 × 2
##   Group    count
##   <fct>    <int>
## 1 Detritus    14
## 2 Plankton    15
## 3 POM         52
## 4 Symbiont    31
sources.all %>%
    group_by(Group2) %>%
    dplyr::summarise(count = n())
## # A tibble: 4 × 2
##   Group2        count
##   <fct>         <int>
## 1 Detritus         14
## 2 Phytoplankton    52
## 3 Symbiont         31
## 4 Zooplankton      15
sources.all %>%
    group_by(Group3) %>%
    dplyr::summarise(count = n())
## # A tibble: 7 × 2
##   Group3                       count
##   <fct>                        <int>
## 1 Detritus                        14
## 2 Phytoplankton-Diatom            15
## 3 Phytoplankton-Dinoflagellate    12
## 4 Phytoplankton-field             25
## 5 Symbiont                        26
## 6 Zooplankton                     15
## 7 Symbiont-Shallow-cave            5
sources.all$Group3 <- factor(sources.all$Group3, levels = c("Detritus",
    "Phytoplankton-Diatom", "Phytoplankton-Dinoflagellate", "Phytoplankton-field",
    "Symbiont", "Symbiont-Shallow-cave", "Zooplankton"))
sources.all$Group4 <- factor(sources.all$Group4, levels = c("Detritus",
    "Phytoplankton-Cyanobacteria", "Phytoplankton-Diatom", "Phytoplankton-Dinoflagellate",
    "Phytoplankton-POM", "Phytoplankton-proxy", "Symbiont-Pocillopora-Palmyra",
    "Symbiont-Mesophotic", "Symbiont-Shallow", "Symbiont-Shallow-cave",
    "Zooplankton-Palmyra", "Zooplankton-AmSam"))

Normalize source data by the mean of all AAess from each source group/study

sources.all %>%
    group_by(Group_Reference) %>%
    dplyr::summarise(count = n())
## # A tibble: 10 × 2
##    Group_Reference   count
##    <fct>             <int>
##  1 Detritus_McMahon      8
##  2 Detritus_Tietbohl     6
##  3 Plankton_Fox         20
##  4 Plankton_McMahon      8
##  5 Plankton_Wall         1
##  6 POM_Fox               8
##  7 POM_Stahl            27
##  8 POM_Tietbohl          9
##  9 Plankton_Radice       5
## 10 Symbiont_Radice      20
# # need to use mean() instead of rowMeans() # because applying a
# function one row at a time, and so the x input is a 1-dimensional
# vector.  sources.all <- sources.all %>% group_by(Group_Reference)
# %>% dplyr::mutate(EAAmean=rowMeans(c(Thr,Ile,Val,Phe,Leu)), na.rm =
# TRUE)
sources.all <- sources.all %>%
    group_by(Group_Reference) %>%
    dplyr::mutate(EAAmean = mean(c(Thr, Ile, Val, Phe, Leu)))

View column to see that EAAmean was averaged across Thr,Ile,Val,Phe,Leu for each Group_Reference (confirmed with raw data)

sources.all$EAAmean
##   [1] -18.93500 -18.93500 -18.93500 -18.93500 -18.93500 -18.93500 -18.93500
##   [8] -18.93500 -11.58000 -11.58000 -11.58000 -11.58000 -11.58000 -11.58000
##  [15] -11.58000 -11.58000 -23.92600 -17.34700 -17.34700 -17.34700 -17.34700
##  [22] -17.34700 -17.34700 -17.34700 -17.34700 -17.34700 -19.91900 -19.91900
##  [29] -19.91900 -19.91900 -19.91900 -19.91900 -19.91900 -19.91900 -17.12781
##  [36] -17.12781 -17.12781 -17.74816 -17.74816 -17.74816 -17.74816 -17.74816
##  [43] -17.74816 -17.12781 -17.12781 -17.12781 -17.12781 -17.12781 -17.12781
##  [50] -14.89926 -14.89926 -14.89926 -14.89926 -14.89926 -14.89926 -14.89926
##  [57] -14.89926 -14.89926 -14.89926 -14.89926 -14.89926 -14.89926 -14.89926
##  [64] -14.89926 -14.89926 -14.89926 -14.89926 -14.89926 -14.89926 -14.89926
##  [71] -14.89926 -14.89926 -14.89926 -14.89926 -14.89926 -14.89926 -17.34700
##  [78] -17.34700 -17.34700 -17.34700 -17.34700 -17.34700 -17.34700 -17.34700
##  [85] -17.34700 -17.34700 -17.34700 -18.36446 -18.36446 -18.36446 -18.36446
##  [92] -18.36446 -18.36446 -18.36446 -18.36446 -18.36446 -18.36446 -18.36446
##  [99] -18.36446 -18.36446 -18.36446 -18.36446 -18.36446 -18.36446 -18.36446
## [106] -18.36446 -18.36446 -19.30631 -19.30631 -19.30631 -19.30631 -19.30631

Normalize for each AAess (EAA)

sources.all <- sources.all %>%
    group_by(Group_Reference) %>%
    dplyr::mutate(Thr_EAAn = Thr - EAAmean)

sources.all <- sources.all %>%
    group_by(Group_Reference) %>%
    dplyr::mutate(Ile_EAAn = Ile - EAAmean)

sources.all <- sources.all %>%
    group_by(Group_Reference) %>%
    dplyr::mutate(Val_EAAn = Val - EAAmean)

sources.all <- sources.all %>%
    group_by(Group_Reference) %>%
    dplyr::mutate(Phe_EAAn = Phe - EAAmean)

sources.all <- sources.all %>%
    group_by(Group_Reference) %>%
    dplyr::mutate(Leu_EAAn = Leu - EAAmean)
head(sources.all[, 88:93])
## # A tibble: 6 × 6
##   EAAmean Thr_EAAn Ile_EAAn Val_EAAn Phe_EAAn Leu_EAAn
##     <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1   -18.9     9.33     1.13   -4.27     -5.76    -6.57
## 2   -18.9    10.9      1.04   -4.27     -5.96    -7.67
## 3   -18.9    11.2      4.93   -2.07     -4.87    -4.37
## 4   -18.9    11.1      2.63   -3.96     -4.87    -5.87
## 5   -18.9     8.53     2.33   -1.57     -5.17    -6.37
## 6   -18.9     9.33     2.83   -0.965    -4.46    -5.87

PCA - sources - exploratory

PCA analysis, this is just for a quick visual to see the stress vectors

# by default, the function PCA() [in FactoMineR], standardizes the
# data automatically during the PCA; so you don’t need do this
# transformation before the PCA
pca.sources <- PCA(sources.all[, 89:93], scale.unit = TRUE, graph = TRUE)

pca.sources
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 112 individuals, described by 5 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

Eigenvalues / variances

# examine the eigenvalues to determine the number of principal
# components to be considered

# get_eigenvalue(pca.sources): Extract the eigenvalues/variances of
# principal components

# eigenvalues measure the amount of variation retained by each
# principal component Eigenvalues are large for the first PCs and
# small for the subsequent PCs the first PCs corresponds to the
# directions with the maximum amount of variation in the data set

# An eigenvalue > 1 indicates that PCs account for more variance than
# accounted by one of the original variables in standardized data
# This is commonly used as a cutoff point for which PCs are retained
# This holds true only when the data are standardized.

eig.val <- get_eigenvalue(pca.sources)
eig.val
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  3.0353353        60.706706                    60.70671
## Dim.2  0.8720099        17.440197                    78.14690
## Dim.3  0.5663010        11.326020                    89.47292
## Dim.4  0.2884903         5.769807                    95.24273
## Dim.5  0.2378635         4.757270                   100.00000
pca.sources <- prcomp(sources.all[, 89:93], scale. = TRUE, center = TRUE)  #everything
summary(pca.sources)
## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5
## Standard deviation     1.7422 0.9338 0.7525 0.5371 0.48771
## Proportion of Variance 0.6071 0.1744 0.1133 0.0577 0.04757
## Cumulative Proportion  0.6071 0.7815 0.8947 0.9524 1.00000

Visualize the eigenvalues

SCREE plot - how many dimensions and what percent represent the data
fviz_eig(pca.sources, addlabels = TRUE, ylim = c(0, 100))

# get_pca_ind(res.pca), get_pca_var(res.pca): Extract the results for
# individuals and variables, respectively provides a list of matrices
# containing all the results for the active variables (coordinates,
# correlation between variables and axes, squared cosine and
# contributions)
var <- get_pca_var(pca.sources)
# Coordinates - var$coord: coordinates of variables to create a
# scatter plot head(var$coord) Cos2: quality on the factor map -
# var$cos2: quality of representation for variables on the factor map
# It’s calculated as the squared coordinates: var.cos2 = var.coord *
# var.coord. head(var$cos2)
# Contributions to the principal components (in percentage) of the
# variables to the principal components The contribution of a
# variable (var) to a given principal component is (in percentage) :
# (var.cos2 * 100) / (total cos2 of the component)
head(var$contrib)
##             Dim.1      Dim.2      Dim.3       Dim.4     Dim.5
## Thr_EAAn 15.44378 21.5356384 58.5271809  2.58455416  1.908848
## Ile_EAAn 17.60274 31.5599542 16.4340315 30.79962124  3.603649
## Val_EAAn 21.17164 25.3926100  0.1935525 16.20450658 37.037691
## Phe_EAAn 23.65026  0.3816408 22.1068181 50.36839977  3.492882
## Leu_EAAn 22.13158 21.1301566  2.7384170  0.04291825 53.956930

Quality of representation

# The quality of representation of the variables on factor map is
# called cos2 (square cosine, squared coordinates)

# The closer a variable is to the circle of correlations, the better
# its representation on the factor map (and the more important it is
# to interpret these components) Variables that are closed to the
# center of the plot are less important for the first components

# library('corrplot') corrplot(var$cos2, is.corr=FALSE)

# plot variables fviz_pca_var(pca.sources, col.var = 'black')

# Total cos2 of variables on Dim.1 and Dim.2
fviz_cos2(pca.sources, choice = "var", axes = 1:2)

Contributions of variables to PCs

# contributions of variables in accounting for the variability in a
# given principal component are expressed in percentage.

# Variables that do not correlated with any PC or correlated with the
# last dimensions are variables with low contribution and might be
# removed to simplify the overall analysis.

# function corrplot() [corrplot] - highlight the most contributing
# variables for each dimension library('corrplot')
# corrplot(var$contrib, is.corr=FALSE)

# Contributions of variables to PC1
fviz_contrib(pca.sources, choice = "var", axes = 1, top = 10)

# Contributions of variables to PC2
fviz_contrib(pca.sources, choice = "var", axes = 2, top = 10)

# The total contribution to PC1 and PC2 is obtained with the
# following R code:
fviz_contrib(pca.sources, choice = "var", axes = 1:2, top = 10)

# red dashed line on the graph above indicates the expected average
# contribution If the contribution of the variables were uniform, the
# expected value would be 1/length(variables) = 1/10 = 10% For a
# given component, a variable with a contribution larger than this
# cutoff could be considered as important in contributing to the
# component

PCA with ellipses

# pdf('PCA_sources-all_Group2_95-CI.pdf', width=5, height=4, paper='special')

fviz_pca_ind(pca.sources,
             axes = c(1,2),
             pointsize = 3,
             geom.ind = "point", # show points only (but not "text")
             col.ind = sources.all$Group2, 
             #palette = c("black", "darkgreen"),
             #palette = "Dark2",
             addEllipses = TRUE, # Concentration ellipses
             ellipse.level = 0.95,
             ellipse.type = "norm",
             legend.title = "Group2",
             mean.point = FALSE,
             #select.var = list(name = c("Thr_EAAn", "Ile_EAAn", "Val_EAAn", "Phe_EAAn", "Leu_EAAn")),
             #repel = TRUE,
             ggtheme = theme_classic(base_size = 12),
             title = "" 
             )

#dev.off()
fviz_pca_ind(pca.sources,
             axes = c(1,2),
             pointsize = 3,
             geom.ind = "point", # show points only (but not "text")
             col.ind = sources.all$Environment, 
             #palette = c("black", "darkgreen"),
             #palette = "Dark2",
             addEllipses = TRUE, # Concentration ellipses
             ellipse.level = 0.95,
             ellipse.type = "norm",
             legend.title = "Environment",
             mean.point = FALSE,
             ggtheme = theme_classic(base_size = 12),
             title = "" 
             )
## Warning: Removed 25 rows containing missing values (geom_point).

fviz_pca_ind(pca.sources,
             axes = c(1,2),
             pointsize = 3,
             geom.ind = "point", # show points only (but not "text")
             col.ind = sources.all$Group_Reference, 
             #palette = c("black", "darkgreen"),
             #palette = "Dark2",
             addEllipses = TRUE, # Concentration ellipses
             ellipse.level = 0.95,
             ellipse.type = "norm",
             legend.title = "Group_Reference",
             mean.point = FALSE,
             ggtheme = theme_classic(base_size = 12),
             title = "" 
             )

# pdf('PCA_sources-all_Group3_50-CI_just4viz.pdf', width=5, height=4, paper='special')

fviz_pca_ind(pca.sources,
             axes = c(1,2),
             pointsize = 3,
             geom.ind = "point", # show points only (but not "text")
             col.ind = sources.all$Group3, 
             palette = c("dodgerblue2", "cadetblue4", "azure4", "black", "chartreuse3", "cyan2", "chocolate1"), #brown1
             #palette = "Dark2",
             addEllipses = TRUE, # Concentration ellipses
             ellipse.level = 0.5,
             ellipse.type = "norm",
             legend.title = "Group3",
             mean.point = FALSE,
             ggtheme = theme_classic(base_size = 12),
             title = "" 
             )

#dev.off()
# pdf('PCA_sources-all_Group3_95-CI.pdf', width=5, height=4, paper='special')

fviz_pca_ind(pca.sources,
             axes = c(1,2),
             pointsize = 3,
             geom.ind = "point", # show points only (but not "text")
             col.ind = sources.all$Group3, 
             palette = c("dodgerblue2", "cadetblue4", "azure4", "black", "chartreuse3", "cyan2", "chocolate1"),
             #palette = "Dark2",
             addEllipses = TRUE, # Concentration ellipses
             ellipse.level = 0.95,
             ellipse.type = "norm",
             legend.title = "Group3",
             mean.point = FALSE,
             ggtheme = theme_classic(base_size = 12),
             title = "" 
             )

#dev.off()
# pdf('PCA_sources-all_Group4_50-CI_just4viz.pdf', width=5, height=4, paper='special')

fviz_pca_ind(pca.sources,
             axes = c(1,2),
             pointsize = 3,
             geom.ind = "point", # show points only (but not "text")
             col.ind = sources.all$Group4, 
             palette = c("dodgerblue2", "grey", "azure4", "cornsilk3", "black", "cadetblue4", "chartreuse3", "darkolivegreen3", "aquamarine2", "cyan2", "chocolate1", "brown1"),
             #palette = "Dark2",
             addEllipses = TRUE, # Concentration ellipses
             ellipse.level = 0.5,
             ellipse.type = "norm",
             legend.title = "Group4",
             mean.point = FALSE,
             ggtheme = theme_classic(base_size = 12),
             title = "" 
             )

#dev.off()

Probably remove Symbiont-Pocillopora-Palmyra - quite distinct from AmSam symbiont data

Our AmSam symbiont data is distinct enough as is - clear separation by both reef depth/environment (shallow, mesophotic, and shallow cave environment) and genus (Leptoseris and Montipora)

Separate our symbiont data by reef depth/environment (shallow, mesophotic, and shallow cave environment)

Can merge Zooplankton data from both in situ reef locations (AmSam and Palmyra)

Can merge Phytoplankton-POM (GFF 0.7 um), Phytoplankton-proxy (McMahon phyto data), and Phytoplankton-Cyanobacteria into one Phytoplankton category

Culture Phytoplankton-Dinoflagellate and culture Phytoplankton-Diatoms more distinct - keep separate?

PC1 <- pca.sources$x[, 1]
PC2 <- pca.sources$x[, 2]
PCAloadings <- data.frame(Variables = rownames(pca.sources$rotation), pca.sources$rotation)

PCA - Final sources

Without Symbiont-Pocillopora-Palmyra

sources.all <- subset(sources.all, Group4 != "Symbiont-Pocillopora-Palmyra")
sources.all <- droplevels(sources.all)
dim(sources.all)
## [1] 101  93

Relevel sources that are similar (same group)

sources.all$Group4 <- revalue(sources.all$Group4, c(`Zooplankton-AmSam` = "Zooplankton",
    `Zooplankton-Palmyra` = "Zooplankton", `Phytoplankton-POM` = "Phytoplankton",
    `Phytoplankton-proxy` = "Phytoplankton", `Phytoplankton-Cyanobacteria` = "Phytoplankton"))
sources.all$Group4 <- factor(sources.all$Group4, levels = c("Detritus",
    "Phytoplankton", "Phytoplankton-Diatom", "Phytoplankton-Dinoflagellate",
    "Symbiont-Mesophotic", "Symbiont-Shallow", "Symbiont-Shallow-cave",
    "Zooplankton"))

PCA - sources

PCA analysis, this is just for a quick visual to see the stress vectors

# by default, the function PCA() [in FactoMineR], standardizes the
# data automatically during the PCA; so you don’t need do this
# transformation before the PCA
pca.sources <- PCA(sources.all[, 89:93], scale.unit = TRUE, graph = TRUE)

pca.sources
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 101 individuals, described by 5 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

Eigenvalues / variances

# examine the eigenvalues to determine the number of principal
# components to be considered

# get_eigenvalue(pca.sources): Extract the eigenvalues/variances of
# principal components

# eigenvalues measure the amount of variation retained by each
# principal component Eigenvalues are large for the first PCs and
# small for the subsequent PCs the first PCs corresponds to the
# directions with the maximum amount of variation in the data set

# An eigenvalue > 1 indicates that PCs account for more variance than
# accounted by one of the original variables in standardized data
# This is commonly used as a cutoff point for which PCs are retained
# This holds true only when the data are standardized.

eig.val <- get_eigenvalue(pca.sources)
eig.val
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1  3.1831233        63.662465                    63.66247
## Dim.2  0.7417401        14.834803                    78.49727
## Dim.3  0.5555686        11.111373                    89.60864
## Dim.4  0.2899579         5.799159                    95.40780
## Dim.5  0.2296100         4.592200                   100.00000
pca.sources <- prcomp(sources.all[, 89:93], scale. = TRUE, center = TRUE)  #everything
summary(pca.sources)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5
## Standard deviation     1.7841 0.8612 0.7454 0.53848 0.47918
## Proportion of Variance 0.6366 0.1484 0.1111 0.05799 0.04592
## Cumulative Proportion  0.6366 0.7850 0.8961 0.95408 1.00000

Visualize the eigenvalues

SCREE plot - how many dimensions and what percent represent the data
fviz_eig(pca.sources, addlabels = TRUE, ylim = c(0, 100))

# get_pca_ind(res.pca), get_pca_var(res.pca): Extract the results for
# individuals and variables, respectively provides a list of matrices
# containing all the results for the active variables (coordinates,
# correlation between variables and axes, squared cosine and
# contributions)
var <- get_pca_var(pca.sources)
# Coordinates - var$coord: coordinates of variables to create a
# scatter plot head(var$coord) Cos2: quality on the factor map -
# var$cos2: quality of representation for variables on the factor map
# It’s calculated as the squared coordinates: var.cos2 = var.coord *
# var.coord. head(var$cos2)
# Contributions to the principal components (in percentage) of the
# variables to the principal components The contribution of a
# variable (var) to a given principal component is (in percentage) :
# (var.cos2 * 100) / (total cos2 of the component)
head(var$contrib)
##             Dim.1       Dim.2     Dim.3      Dim.4     Dim.5
## Thr_EAAn 17.23235  0.06388974 79.835418  1.4457812  1.422557
## Ile_EAAn 16.41516 51.41054726  1.423026 29.2284646  1.522805
## Val_EAAn 21.57240 20.92071904  5.751185 12.1936299 39.562070
## Phe_EAAn 22.37120  7.08394965 11.985814 56.8465119  1.712521
## Leu_EAAn 22.40889 20.52089431  1.004557  0.2856124 55.780048

Quality of representation

# The quality of representation of the variables on factor map is
# called cos2 (square cosine, squared coordinates)

# The closer a variable is to the circle of correlations, the better
# its representation on the factor map (and the more important it is
# to interpret these components) Variables that are closed to the
# center of the plot are less important for the first components

# library('corrplot') corrplot(var$cos2, is.corr=FALSE)

# plot variables fviz_pca_var(pca.sources, col.var = 'black')

# Total cos2 of variables on Dim.1 and Dim.2
fviz_cos2(pca.sources, choice = "var", axes = 1:2)

Contributions of variables to PCs

# contributions of variables in accounting for the variability in a
# given principal component are expressed in percentage.

# Variables that do not correlated with any PC or correlated with the
# last dimensions are variables with low contribution and might be
# removed to simplify the overall analysis.

# function corrplot() [corrplot] - highlight the most contributing
# variables for each dimension library('corrplot')
# corrplot(var$contrib, is.corr=FALSE)

# Contributions of variables to PC1
fviz_contrib(pca.sources, choice = "var", axes = 1, top = 10)

# Contributions of variables to PC2
fviz_contrib(pca.sources, choice = "var", axes = 2, top = 10)

# The total contribution to PC1 and PC2 is obtained with the
# following R code:
fviz_contrib(pca.sources, choice = "var", axes = 1:2, top = 10)

# red dashed line on the graph above indicates the expected average
# contribution If the contribution of the variables were uniform, the
# expected value would be 1/length(variables) = 1/10 = 10% For a
# given component, a variable with a contribution larger than this
# cutoff could be considered as important in contributing to the
# component

PCA with ellipses

# pdf('PCA_sources-all_Group2_95-CI.pdf', width=5, height=4, paper='special')

fviz_pca_ind(pca.sources,
             axes = c(1,2),
             pointsize = 3,
             geom.ind = "point", # show points only (but not "text")
             col.ind = sources.all$Group2, 
             #palette = c("black", "darkgreen"),
             #palette = "Dark2",
             addEllipses = TRUE, # Concentration ellipses
             ellipse.level = 0.95,
             ellipse.type = "norm",
             legend.title = "Group2",
             mean.point = FALSE,
             #select.var = list(name = c("Thr_EAAn", "Ile_EAAn", "Val_EAAn", "Phe_EAAn", "Leu_EAAn")),
             #repel = TRUE,
             ggtheme = theme_classic(base_size = 12),
             title = "" 
             )

#dev.off()
fviz_pca_ind(pca.sources,
             axes = c(1,2),
             pointsize = 3,
             geom.ind = "point", # show points only (but not "text")
             col.ind = sources.all$Environment, 
             #palette = c("black", "darkgreen"),
             #palette = "Dark2",
             addEllipses = TRUE, # Concentration ellipses
             ellipse.level = 0.95,
             ellipse.type = "norm",
             legend.title = "Environment",
             mean.point = FALSE,
             ggtheme = theme_classic(base_size = 12),
             title = "" 
             )
## Warning: Removed 25 rows containing missing values (geom_point).

# pdf('PCA_sources-all_Group3_50-CI_just4viz.pdf', width=5, height=4, paper='special')

fviz_pca_ind(pca.sources,
             axes = c(1,2),
             pointsize = 3,
             geom.ind = "point", # show points only (but not "text")
             col.ind = sources.all$Group3, 
             palette = c("dodgerblue2", "cadetblue4", "azure4", "black", "chartreuse3", "cyan2", "chocolate1"), #brown1
             #palette = "Dark2",
             addEllipses = TRUE, # Concentration ellipses
             ellipse.level = 0.5,
             ellipse.type = "norm",
             legend.title = "Group3",
             mean.point = FALSE,
             ggtheme = theme_classic(base_size = 12),
             title = "" 
             )

#dev.off()
# pdf('PCA_sources-all_Group3_95-CI.pdf', width=5, height=4, paper='special')

fviz_pca_ind(pca.sources,
             axes = c(1,2),
             pointsize = 3,
             geom.ind = "point", # show points only (but not "text")
             col.ind = sources.all$Group3, 
             palette = c("dodgerblue2", "cadetblue4", "azure4", "black", "chartreuse3", "cyan2", "chocolate1"),
             #palette = "Dark2",
             addEllipses = TRUE, # Concentration ellipses
             ellipse.level = 0.95,
             ellipse.type = "norm",
             legend.title = "Group3",
             mean.point = FALSE,
             ggtheme = theme_classic(base_size = 12),
             title = "" 
             )

#dev.off()
# pdf('PCA_sources-all_Group4_50-CI_just4viz.pdf', width=5, height=4, paper='special')

fviz_pca_ind(pca.sources,
             axes = c(1,2),
             pointsize = 3,
             geom.ind = "point", # show points only (but not "text")
             col.ind = sources.all$Group4, 
             palette = c("dodgerblue2", "black", "azure4", "cadetblue4", "chartreuse3", "aquamarine2", "cyan2", "chocolate1", "brown1"), #"grey", "cornsilk3", "darkolivegreen3", 
             #palette = "Dark2",
             addEllipses = TRUE, # Concentration ellipses
             ellipse.level = 0.5,
             ellipse.type = "norm",
             legend.title = "Group4",
             mean.point = FALSE,
             ggtheme = theme_classic(base_size = 12),
             title = "" 
             )

#dev.off()

Predicting isotopic fingerprints of primary producers

Linear Discriminant Analysis (LDA)

Dimension reduction - find a linear combination of the predictors that gives maximum separation between the centers of the data while at the same time minimizing the variation within each group of data.

Uses MASS package.

Normalize data by the mean

Coral host data

# Normalize each essential AA
host.aa.d13C.ess$EAAmean = rowMeans(subset(host.aa.d13C.ess, select = c(Thr,
    Ile, Val, Leu, Phe)), na.rm = TRUE)
host.aa.d13C.ess$Thr_EAAn = host.aa.d13C.ess$Thr - host.aa.d13C.ess$EAAmean
host.aa.d13C.ess$Ile_EAAn = host.aa.d13C.ess$Ile - host.aa.d13C.ess$EAAmean
host.aa.d13C.ess$Val_EAAn = host.aa.d13C.ess$Val - host.aa.d13C.ess$EAAmean
host.aa.d13C.ess$Leu_EAAn = host.aa.d13C.ess$Leu - host.aa.d13C.ess$EAAmean
host.aa.d13C.ess$Phe_EAAn = host.aa.d13C.ess$Phe - host.aa.d13C.ess$EAAmean

LDA with POM, Plankton, Symbionts, Detritus as sources - classify the animal fractions

create blank objects to fill with data

symb.boot <- NULL
plkt.boot <- NULL
detritus.boot <- NULL
pom.boot <- NULL
# cyano.boot<-NULL diat.boot<-NULL dinofl.boot<-NULL
source.boot <- NULL

coral.host.class <- NULL

a <- NULL
b <- NULL
c <- NULL
d <- NULL

auto <- NULL
hetero <- NULL
a_mean <- NULL
h_mean <- NULL

dist <- NULL

coral.host.prop <- NULL
prop <- NULL
source.prop <- NULL

LDA

# The prior argument sets the prior probabilities of class
# membership. If unspecified, the class proportions for the training
# set are used. If present, the probabilities should be specified in
# the order of the factor levels.

## Calculate an error rate for our LDA model - using source dataset
## run an LDA with a jacknifing model fit, to look at error rate the
## line 'CV = TRUE' makes the LDA do jacknifed (leave-one-out
## cross-validation) model fit
group.lda.norm <- lda(Group ~ Ile_EAAn + Leu_EAAn + Phe_EAAn + Thr_EAAn +
    Val_EAAn, data = sources.all, CV = TRUE)

classification of the LDA model to the sources

# create a table which compares the classification of the LDA model
# to the sources
ct.prod.norm <- table(sources.all$Group, group.lda.norm$class)
ct.prod.norm
##           
##            Detritus Plankton POM Symbiont
##   Detritus       14        0   0        0
##   Plankton        1        6   8        0
##   POM             0        4  43        5
##   Symbiont        0        0  15        5

Overall, Plankton and Symbiont did not classify particularly well - Plankton samples are highly heterogeneous, and likely contain phytoplankton as well (or otherwise plankton feeds on phytoplankton) - Coral Symbionts are dinoflagellates and thus makes sense they group with POM dinoflagellates

Looking at more specific breakdown of Plankton (Phytoplankton vs. Zooplankton)

ct.prod.norm.2 <- table(sources.all$Group2, group.lda.norm$class)
ct.prod.norm.2
##                
##                 Detritus Plankton POM Symbiont
##   Detritus            14        0   0        0
##   Phytoplankton        0        4  43        5
##   Symbiont             0        0  15        5
##   Zooplankton          1        6   8        0

Further detail

ct.prod.norm.3 <- table(sources.all$Group3, group.lda.norm$class)
ct.prod.norm.3
##                               
##                                Detritus Plankton POM Symbiont
##   Detritus                           14        0   0        0
##   Phytoplankton-Diatom                0        0  15        0
##   Phytoplankton-Dinoflagellate        0        0   8        4
##   Phytoplankton-field                 0        4  20        1
##   Symbiont                            0        0  10        5
##   Symbiont-Shallow-cave               0        0   5        0
##   Zooplankton                         1        6   8        0

Even more detail

ct.prod.norm.4 <- table(sources.all$Group4, group.lda.norm$class)
ct.prod.norm.4
##                               
##                                Detritus Plankton POM Symbiont
##   Detritus                           14        0   0        0
##   Phytoplankton                       0        4  20        1
##   Phytoplankton-Diatom                0        0  15        0
##   Phytoplankton-Dinoflagellate        0        0   8        4
##   Symbiont-Mesophotic                 0        0   5        5
##   Symbiont-Shallow                    0        0   5        0
##   Symbiont-Shallow-cave               0        0   5        0
##   Zooplankton                         1        6   8        0
# total percent of samples correctly classified is the sum of the
# diagonal of this table
sum(diag(prop.table(ct.prod.norm)))
## [1] 0.6732673
sum(diag(prop.table(ct.prod.norm.2)))
## [1] 0.3267327
sum(diag(prop.table(ct.prod.norm.3)))
## [1] 0.2277228
sum(diag(prop.table(ct.prod.norm.4)))
## [1] 0.3663366

In situ data with phytoplankton culture [1] 0.6435644 [1] 0.3168317 [1] 0.3267327

In situ data only [1] 0.5405405 [1] 0.5675676 [1] 0.2567568

# what % of each species is being correctly classified
diag(prop.table(ct.prod.norm, 1))
##  Detritus  Plankton       POM  Symbiont 
## 1.0000000 0.4000000 0.8269231 0.2500000
diag(prop.table(ct.prod.norm.2, 1))
## [1] 1.00000000 0.07692308 0.75000000 0.00000000
diag(prop.table(ct.prod.norm.3, 1))
## [1] 1.0000000 0.0000000 0.6666667 0.0400000
diag(prop.table(ct.prod.norm.4, 1))
## [1] 1.0000000 0.1600000 1.0000000 0.3333333

In situ data with phytoplankton culture Detritus Plankton POM Symbiont 1.0000000 0.2666667 0.8076923 0.2500000 1.00000000 0.07692308 0.60000000 0.25000000 1.0000000 0.0000000 1.0000000 0.3333333

In situ data only Detritus Plankton POM Symbiont 1.00000000 0.06666667 0.56000000 0.55000000 1.0000000 0.2400000 0.7333333 0.5500000 1.0000000 0.0000000 0.3636364 0.1250000

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS High Sierra 10.13.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] MASS_7.3-54      FactoMineR_2.4   factoextra_1.0.7 ggplot2_3.3.5   
## [5] plyr_1.8.6       dplyr_1.0.7     
## 
## loaded via a namespace (and not attached):
##  [1] ggrepel_0.9.1        Rcpp_1.0.7           lattice_0.20-45     
##  [4] tidyr_1.1.4          assertthat_0.2.1     digest_0.6.28       
##  [7] utf8_1.2.2           cellranger_1.1.0     R6_2.5.1            
## [10] backports_1.2.1      evaluate_0.14        highr_0.9           
## [13] pillar_1.6.3         rlang_0.4.11         readxl_1.3.1        
## [16] curl_4.3.2           rstudioapi_0.13      data.table_1.14.2   
## [19] car_3.0-11           jquerylib_0.1.4      DT_0.23             
## [22] rmarkdown_2.11       labeling_0.4.2       stringr_1.4.0       
## [25] foreign_0.8-81       htmlwidgets_1.5.4    munsell_0.5.0       
## [28] broom_0.7.9          compiler_4.1.1       xfun_0.26           
## [31] pkgconfig_2.0.3      htmltools_0.5.4      flashClust_1.01-2   
## [34] tidyselect_1.1.1     tibble_3.1.5         rio_0.5.27          
## [37] fansi_0.5.0          crayon_1.4.1         withr_2.4.2         
## [40] ggpubr_0.4.0         leaps_3.1            grid_4.1.1          
## [43] jsonlite_1.7.2       gtable_0.3.0         lifecycle_1.0.1     
## [46] DBI_1.1.1            magrittr_2.0.1       formatR_1.11        
## [49] scales_1.1.1         zip_2.2.0            carData_3.0-4       
## [52] cli_3.0.1            stringi_1.7.5        cachem_1.0.6        
## [55] farver_2.1.0         ggsignif_0.6.3       scatterplot3d_0.3-41
## [58] bslib_0.4.2          ellipsis_0.3.2       generics_0.1.0      
## [61] vctrs_0.3.8          openxlsx_4.2.4       forcats_0.5.1       
## [64] tools_4.1.1          glue_1.4.2           purrr_0.3.4         
## [67] hms_1.1.1            abind_1.4-5          fastmap_1.1.0       
## [70] yaml_2.2.1           colorspace_2.0-2     cluster_2.1.2       
## [73] rstatix_0.7.0        haven_2.4.3          knitr_1.36          
## [76] sass_0.4.4